global data ""  // path for source data
global temp ""  // path for intermediary data
global final ""  // path for final replication sample
global tables ""  // path for exporting tables
global figures ""  // path for exporting figures

/*
This do file constructs famine intensity variable, using the rate of cohort loss
(projected from 1990 and 2000 Census) and thermal agricultural productivity 
(calculated from weather station data). It also includes the replication code 
for Figures 1, S1-S2 and Table S3.

Input: 
	wthr-19502012.dta: daily instrumental temperature records of 854 weather 
		stations covering years 1950-2012, China Meteorological Administration;
	County_GeoInfo.dta: Coordinates of 2870 counties;
	Station_GeoInfo.dta: Coordinates of 793 weather stations;
	Census90.dta: 1990 Population Census;
	Census00.dta: 2000 Population Census;
	match_90_00.dta: Manually compiled match of the county IDs in 1990 and 2000;
	match_00_ID.dta: Manually compiled match of the county IDs in 2000 and the 
		IDs of the counties in China_coord.dta;
	China_coord.dta: Coordinates of 2876 county boundaries, to plot Figure 1.

Output:
	famine-intensity-estcy.dta: Predicted cohort loss rate based on thermal 
		agricultural productivity by county-year for 2,727 counties covering 
		years 1951-1970;
	famine-intensity-estc.dta: Famine intensity for 2,727 counties;
	Weather_sample_1.dta, Weather_sample_2.dta: To replicate Table S2.
*/


/*------------------------------------------------------------------------------
Clean the weather data and construct thermal agriculture productivity measure
Part 1: Primary construction according to Ritchie and NeSmith (1991) and 
alternative construction according to Deschenes and Greenstone (2007), based on 
April-September planting season.
------------------------------------------------------------------------------*/

use "$data/wthr-19502012.dta", clear

keep if year >= 1949 & year <= 1970 
keep if month >= 4 & month <= 9

drop if prec==. | avgtemp==.
drop high low ssd noaa

bysort station year: gen count = _N
keep if count == 183 
drop count
	
gen degree  = .
replace degree  = 0 if avgtemp <=8 
replace degree  = avgtemp - 8 if avgtemp>8 & avgtemp<=32 
replace degree  = 24 if avgtemp>32 
	
gen degree1 = 0 
replace degree1 = avgtemp - 8 if avgtemp>8 & avgtemp<=33 
replace degree1 = 3.125*(41-avgtemp) if avgtemp>33 & avgtemp<41   
	
foreach i in degree degree1  {
	bys stationid year: egen `i'm=total(`i')
	replace `i'=`i'm
	drop `i'm
}

duplicates drop stationid year, force
	 
keep stationid year degree*

label var degree "Agricultural temperature"
label var degree1 "Agricultural temperature"
	
save "$temp/Weather_main.dta", replace

/*------------------------------------------------------------------------------
Figure S2 and Preparation for Table S2
------------------------------------------------------------------------------*/
use "$temp/Weather_main.dta", clear
gen degree1_cur = degree1

xtset stationid year
bysort year: egen degree_ttl = mean(degree1_cur)
su degree_ttl
global degavg = r(mean)

twoway (scatter degree_ttl year, c(l) sort m(O) mc(1)) , $gropt ytitle("Degree days") ///
	yline($degavg) yscale(r(1900 2400)) ylab(1900(100)2400, labsize(small))
graph export "$figures/FigureS2.png" , as(png) replace 

use "$temp/Weather_main.dta", clear
sort stationid year
tsset stationid year
tsfill
gen degree1_cur = degree1/1000
by stationid (year): gen degree1_lag1 = l.degree1_cur
by stationid (year): gen degree1_lag2 = l.degree1_lag1
label var degree1_cur "Thermal agricultural productivity"
label var degree1_lag1 "Thermal agricultural productivity (t-1)"
label var degree1_lag2 "Thermal agricultural productivity (t-2)"
drop degree
save "$final/Weather_sample_1.dta", replace

use "$temp/Weather_main.dta", clear
sort stationid year
tsset stationid year
tsfill
gen degree1_cur = degree1/1000
by stationid (year): gen degree1_lag1 = l.degree1_cur
gen degree1_lag1_diff = degree1_cur - degree1_lag1

gen famine_period = (year==1959 | year==1960 | year==1961)
collapse (mean) avg_degree = degree1_cur, by(stationid famine_period)
reshape wide avg_degree , i(stationid) j(famine_period)
gen avg_degree_diff = avg_degree1 - avg_degree0

label var avg_degree_diff "Change in thermal agricultural productivity"
label var avg_degree0 "Thermal agricultural productivity in normal years"
drop avg_degree1
save "$final/Weather_sample_2.dta", replace

/*------------------------------------------------------------------------------
Clean the weather data and construct thermal agriculture productivity measure
Part 2: Primary construction according to Ritchie and NeSmith (1991), based on 
all months of the year.
------------------------------------------------------------------------------*/
use "$data/wthr-19502012.dta", clear

keep if year >= 1949 & year <= 1970 

drop if prec==. | avgtemp==.
drop high low ssd noaa

bysort station year: gen count = _N
tab count
keep if count >= 365
drop count

gen degree1 = 0 
replace degree1 = avgtemp - 8 if avgtemp>8 & avgtemp<=33 
replace degree1 = 3.125*(41-avgtemp) if avgtemp>33 & avgtemp<41   
	
foreach i in degree1  {
	bys stationid year: egen `i'm=total(`i')
	replace `i'=`i'm
	drop `i'm
}

duplicates drop stationid year, force

keep stationid year degree*

label var degree1 "Agricultural temperature (all months)"
rename degree1 degree1_a
	
keep stationid year degree1_a
	
save "$temp/Weather_all_months.dta", replace
	

/*------------------------------------------------------------------------------
Clean the weather data and construct thermal agriculture productivity measure
Part 3: Primary construction according to Ritchie and NeSmith (1991), based on 
region-specific planting season.
------------------------------------------------------------------------------*/


use "$data/wthr-19502012.dta", clear

gen agri_window = 1 if province == "LIAONING" | province == "JILIN" | ///
	province == "HEILONGJIANG"
replace agri_window = 2 if province == "HEBEI" | province == "SHAANXI" | ///
	province == "SHANXI" | province == "GANSU" | province == "NINGXIA" | ///
	province == "INNER MONGOLIA" | province == "SHANDONG" | province == "HEBEI" | ///
	province == "HENAN" | province == "BEIJING" | province == "TIANJIN" | ///
	province == "TIBET" | province == "QINGHAI" | province == "XINJIANG"
replace agri_window = 3 if province == "HUBEI" | province == "ANHUI" | ///
	province == "JIANGSU" | province == "HUNAN" | province == "JIANGXI" | ///
	province == "ZHEJIANG" | province == "FUJIAN" | province == "SICHUAN" | ///
	province == "GUANGDONG" | province == "GUANGXI" | province == "YUNNAN" | ///
	province == "GUIZHOU" | province == "CHONGQING" | province == "HAINAN" | ///
	province == "SHANGHAI" 

label define window_lbl 1 "Northeast (May-Sep)" 2  ///
	"North of Huai/Qinling (Apr-Oct)" 3 "South of Huai/Qinling (Mar-Nov)"
label values agri_window window_lbl

keep if year >= 1949 & year <= 1970
keep if (agri_window == 1 & month >= 5 & month <= 9) | ///
	(agri_window == 2 & month >= 4 & month <= 10) | ///
	(agri_window == 3 & month >= 3 & month <= 11) 

drop if prec==. | avgtemp==.
drop high low ssd noaa

bysort station year: gen cnt = _N
tab cnt if agri_window == 1
tab cnt if agri_window == 2
tab cnt if agri_window == 3
keep if (agri_window == 1 & cnt == 153) | (agri_window == 2 & cnt == 214) ///
	| (agri_window == 3 & cnt == 275)
drop cnt

gen degree1m = 0  
replace degree1m = avgtemp - 8 if avgtemp>8 & avgtemp<=33 
replace degree1m = 3.125*(41-avgtemp) if avgtemp>33 & avgtemp<41   
	
foreach i in degree1m  {
	bys stationid year: egen `i'm=total(`i')
	replace `i'=`i'm
	drop `i'm
}

duplicates drop stationid year, force

keep stationid year degree*

label var degree1m "Agricultural temperature [non-mono]"
	
save "$temp/Weather_m.dta", replace

/*------------------------------------------------------------------------------
Clean the weather data and construct thermal agriculture productivity measure
Part 4: Calculate thermal agricultural productivity using weighted average of 
temperatures from three nearest stations, inversely weighted by distance.
------------------------------------------------------------------------------*/
* Match county and the nearest three weather stations
	
use "$data/County_GeoInfo.dta", clear
cross using "$data/Station_GeoInfo"

geodist cty_lat cty_lon stn_lat stn_lon, gen(distance)
sort uid2000 distance stationid
by uid2000: keep if _n<=3
	
gen dis_2 = distance^(-2)
by uid2000: egen dis_sum = sum(dis_2)
gen weight = dis_2/dis_sum
	
sort uid2000 distance stationid
by uid: gen station1 = stationid if _n == 1
by uid: gen distance1 = distance if _n == 1
by uid: gen weight1 = weight if _n == 1
by uid: gen station2 = stationid if _n == 2
by uid: gen distance2 = distance if _n == 2
by uid: gen weight2 = weight if _n == 2
by uid: gen station3 = stationid if _n == 3
by uid: gen distance3 = distance if _n == 3
by uid: gen weight3 = weight if _n == 3
by uid: replace station1 = station1[_n-1] if missing(station1)
by uid: replace weight1 = weight1[_n-1] if missing(weight1)
by uid: replace distance1 = distance1[_n-1] if missing(distance1)
by uid: replace station2 = station2[_n-1] if missing(station2)
by uid: replace weight2 = weight2[_n-1] if missing(weight2)
by uid: replace distance2 = distance2[_n-1] if missing(distance2)
by uid: keep if _n == 3
	
keep uid2000 station1 distance1 weight1 station2 distance2 weight2 station3 distance3 weight3
	
gen year=1951
expand 2,gen(temp)
replace year=year+temp
drop temp
forvalues x=1/18 {
	expand 2 if year==1951+`x',gen(temp_`x')
	replace year=1952+`x' if temp_`x'==1
	drop temp_`x'
}

order uid2000 year 
	
* Inversely weighted by distance
	
rename station1 stationid
merge m:1 stationid year using "$temp/Weather_main"
drop if _m == 2
drop _m
drop degree 
rename stationid station1
rename degree1 degree1_nonmono
	
rename station2 stationid
merge m:1 stationid year using "$temp/Weather_main"
drop if _m == 2
drop _m
drop degree 
rename stationid station2
rename degree1 degree2_nonmono
	
rename station3 stationid
merge m:1 stationid year using "$temp/Weather_main"
drop if _m == 2
drop _m
drop degree 
rename stationid station3
rename degree1 degree3_nonmono
	
sort uid year

gen dis1_2 = distance1^(-2) if !missing(degree1)
gen dis2_2 = distance2^(-2) if !missing(degree2)
gen dis3_2 = distance3^(-2) if !missing(degree3)
gen dis_sum = dis1 + dis2 + dis3
replace dis_sum = dis1 + dis2 if missing(dis3)
replace dis_sum = dis1 + dis3 if missing(dis2)
replace dis_sum = dis2 + dis3 if missing(dis1)
replace dis_sum = dis1 if missing(dis2) & missing(dis3)
replace dis_sum = dis2 if missing(dis1) & missing(dis3)
replace dis_sum = dis3 if missing(dis1) & missing(dis2)
gen wt1 = dis1_2/dis_sum
gen wt2 = dis2_2/dis_sum
gen wt3 = dis3_2/dis_sum
drop weight*
gen wdegree = wt1*degree1 + wt2*degree2 + wt3*degree3
replace wdegree = wt1*degree1 + wt2*degree2 if missing(dis3)
replace wdegree = wt1*degree1 + wt3*degree3 if missing(dis2)
replace wdegree = wt2*degree2 + wt3*degree3 if missing(dis1)
replace wdegree = wt1*degree1 if missing(dis2) & missing(dis3)
replace wdegree = wt2*degree2 if missing(dis1) & missing(dis3)
replace wdegree = wt3*degree3 if missing(dis1) & missing(dis2)
	
rename wdegree wdegree_nonmono
label var wdegree "Weighted Agricultural Temperature"
keep uid2000 year wdegree_nonmono
save "$temp/Weather_station_weighted.dta", replace
	

/*------------------------------------------------------------------------------
Clean the weather data and construct thermal agriculture productivity measure
Part 5: Match county to the nearest weather station
------------------------------------------------------------------------------*/
use "$data/County_GeoInfo.dta", clear
cross using "$data/Station_GeoInfo"

geodist cty_lat cty_lon stn_lat stn_lon, gen(distance)
sort uid2000 distance stationid
by uid2000: keep if _n==1
	
keep uid2000 stationid distance
save "$temp/match_station.dta", replace


/*------------------------------------------------------------------------------
Clean the weather data and construct thermal agriculture productivity measure
Part 6: Combine all measures of thermal agriculture productivity
------------------------------------------------------------------------------*/
use "$temp/match_station.dta", clear
drop if uid2000==. 
	
gen year=1951
expand 2,gen(temp)
replace year=year+temp
drop temp
forvalues x=1/18 {
	expand 2 if year==1951+`x',gen(temp_`x')
	replace year=1952+`x' if temp_`x'==1
	drop temp_`x'
}

keep uid2000 year stationid distance
order uid2000 year stationid distance
	
merge m:1 stationid year using "$temp/Weather_main"
drop if _m==2
drop _m

keep uid2000 stationid year degree*

*** all month's degree ***

merge m:1 stationid year using "$temp/Weather_all_months.dta"
drop if _m == 2
drop _m

*** region specific degree ***

merge m:1 stationid year using "$temp/Weather_m.dta"
drop if _m == 2
drop _m

*** weighted average of three stations ***

merge 1:1 uid2000 year using "$temp/Weather_station_weighted.dta"
drop _m

keep uid2000 year stationid degree* wdegree

save "$temp/Weather.dta", replace

/*------------------------------------------------------------------------------
Construct rate of cohort loss using the 1990 and 2000 Census
Part 1: Observed size of cohort in 1990 and 2000 Census
------------------------------------------------------------------------------*/
* census 2000

use "$data/Census00.dta", clear 
keep id r081 r041 r03 
format id %20.0g
	
drop if r041<1949
drop if r041>1970

keep if r081==1 
drop r081

gen uid2000=int(id/(10^12))
drop id	
	
rename r041 year
labe var year "Year"	
	
gen byte count = 1 
gen byte count_m = 1 if r03 == 1
collapse (sum) n_2000 = count m_2000 = count_m, by(uid2000 year) 
	
gen f_2000 = n_2000 - m_2000
	
label var n_2000 "Cohort size (2000)"
label var m_2000 "Cohort size (male)"
label var f_2000 "Cohort size (female)" 
	
xtset uid2000 year 
tsfill
count if n==.
su n, d

save "$temp/temp1.dta",replace
clear

* census 1990

use "$data/Census90.dta", clear
keep id r03 r041 r061 r071
format id %20.0g
gen uid1990=int(id/(10^12))
drop id	
rename r03 sex
rename r041 birthyr
rename r061 reside
rename r071 reside85
	
replace birthyr=birthyr+1000
drop if birthyr<1949
drop if birthyr>1970
rename birthyr year
label var year "Year"
	
keep if reside85 == 1 & reside == 1 
drop reside85 reside 
	
gen byte count = 1 
gen byte count_m = 1 if sex == 1
collapse (sum) n_1990 = count m_1990 = count_m , by(uid1990 year) 
gen f_1990 = n_1990 - m_1990
	
label var n_1990 "Cohort size (1990)"
label var m_1990 "Cohort size (male)"
label var f_1990 "Cohort size (female)" 
	
xtset uid1990 year 
tsfill
count if n==.
su n, d

save "$temp/temp2",replace
clear
	
* merge the data sets
use "$data/match_90_00", clear
drop if uid1990==.
merge 1:m uid1990 using "$temp/temp2"
keep if _m==3
drop _m
save "$temp/temp3",replace
	
use "$data/match_90_00", clear
merge 1:m uid2000 using "$temp/temp1"
keep if  _merge==3
drop _merge
save "$temp/temp4",replace

merge 1:1 uid2000 year using "$temp/temp3"	
drop _merge	
	
foreach i in 2000 1990{
	gen x=(n_`i'==.)
	bys uid2000: egen xx=max(x)  
	bys uid2000: gen count = _N 
	replace xx = 1 if count != 22 
	replace n_`i'=. if xx == 1
	replace f_`i'=. if xx == 1 
	replace m_`i'=. if xx == 1  
	drop x xx count 
}
		
gen n_9000=n_2000+n_1990
replace n_9000=. if change==1 
label var n_9000 "Observed cohort size (1990+2000)"
	
gen m_9000 = m_2000 + m_1990
replace m_9000 = . if change==1 
label var m_9000 "Observed male cohort size (1990+2000)"
	
gen f_9000 = f_2000 + f_1990
replace f_9000 = . if change==1 
label var f_9000 "Observed female cohort size (1990+2000)"
	
save "$temp/temp5",replace

/*------------------------------------------------------------------------------
Construct rate of cohort loss using the 1990 and 2000 Census
Part 2: Figure S1
------------------------------------------------------------------------------*/
use "$temp/temp5", clear
keep if n_1990~=.
keep uid2000 year n_1990

xtset uid2000 year
bysort year: egen popn_ttl = sum(n_1990)
reg popn_ttl year if year < 1958 | year > 1962
predict pred_ttl, xb

twoway (scatter popn_ttl year, c(l) sort m(O) mc(1)) ///
	(scatter pred_ttl year, c(l) sort m(D) mc(gs10)) ///
	, legend(lab(1 "Actual population") lab(2 "Linear projection")) ///
	$gropt ytitle("") ylab(100000(50000)300000, labsize(small))
graph export "$figures/FigureS1_population.png" , as(png) replace
clear
	
/*------------------------------------------------------------------------------
Construct rate of cohort loss using the 1990 and 2000 Census
Part 3: Rate of cohort loss by projection
------------------------------------------------------------------------------*/
* linear projection
foreach i in n_2000 n_1990 n_9000 m_2000 m_1990 m_9000 f_2000 f_1990 f_9000 {
	use "$temp/temp5", clear
	keep if `i'~=.
	gen pred=.
	quietly levelsof uid2000, local(counties) 
	
	foreach x of local counties {	
		quietly reg `i' year if (year<=1957 | year>1962) & uid2000==`x'
		predict temp_pred,xb
		replace pred=temp_pred if uid2000==`x'
		drop temp_pred
	}
	
	gen death_`i'=(pred-`i')/pred
	keep uid2000 year `i' death_`i' 
	save "$temp/temp_`i'", replace
	clear
}
	
* quadatic projection 
foreach i in n_1990 m_1990 f_1990 {
	use "$temp/temp5", clear
	keep if `i'~=.
	gen pred=.
	gen year2 = year*year
	quietly levelsof uid2000, local(counties) 
	
	foreach x of local counties {	
		quietly reg `i' year2 if (year<=1957 | year>1962) & uid2000==`x'
		predict temp_pred,xb
		replace pred=temp_pred if uid2000==`x'
		drop temp_pred
	}
	
	gen death_`i'_q =(pred-`i')/pred
	keep uid2000 year `i' death_`i'_q
	save "$temp/temp_`i'_q", replace
	clear
}
	
* exponential projection 
foreach i in n_1990 m_1990 f_1990 {
	use "$temp/temp5", clear
	keep if `i'~=.
	gen pred=.
	gen yeare = exp(year/100)
	quietly levelsof uid2000, local(counties) 
	
	foreach x of local counties {	
		quietly reg `i' yeare if (year<=1957 | year>1962) & uid2000==`x'
		predict temp_pred,xb
		replace pred=temp_pred if uid2000==`x'
		drop temp_pred
	}
	
	gen death_`i'_e =(pred-`i')/pred
	keep uid2000 year `i' death_`i'_e 
	save "$temp/temp_`i'_e", replace
	clear
}
	
/*------------------------------------------------------------------------------
Construct rate of cohort loss using the 1990 and 2000 Census
Part 4: Combine all datasets
------------------------------------------------------------------------------*/

use "$temp/temp_n_2000", clear
	
foreach var in n_1990 n_9000 m_2000 m_1990 m_9000 f_2000 f_1990 f_9000 ///
	n_1990_q m_1990_q f_1990_q n_1990_e m_1990_e f_1990_e {
	quietly merge 1:1 uid2000 year using "$temp/temp_`var'" 
	drop _merge
} 
	
label var death_n_2000 "Cohort loss rate (2000)"
label var death_n_1990 "Cohort loss rate (1990)"
label var death_n_9000 "Cohort loss rate (1990+2000)"
label var death_m_2000 "Male cohort loss rate (2000)"
label var death_m_1990 "Male cohort loss rate (1990)"
label var death_m_9000 "Male cohort loss rate (1990+2000)"
label var death_f_2000 "Female cohort loss rate (2000)"
label var death_f_1990 "Female cohort loss rate (1990)"
label var death_f_9000 "Female cohort loss rate (1990+2000)"
label var death_n_1990_q "Cohort loss rate (1990) (quadratic projection)"
label var death_m_1990_q "Male cohort loss rate (1990) (quadratic projection)"
label var death_f_1990_q "Female cohort loss rate (1990) (quadratic projection)"
label var death_n_1990_e "Cohort loss rate (1990) (exponential projection)"
label var death_m_1990_e "Male cohort loss rate (1990) (exponential projection)"
label var death_f_1990_e "Female cohort loss rate (1990) (exponential projection)"
	
save "$temp/Death.dta", replace
clear

* erase datasets

forvalues i=1/5 {
	erase "$temp/temp`i'.dta"
}


foreach var in n_1990 n_9000 m_2000 m_1990 m_9000 f_2000 f_1990 f_9000 ///
	n_1990_q m_1990_q f_1990_q n_1990_e m_1990_e f_1990_e {
	erase "$temp/temp_`var'.dta"
} 


/*------------------------------------------------------------------------------
Constuct measures of famine intensity
Part 1: Combine thermal agricultural productivity and rate of cohort loss
------------------------------------------------------------------------------*/

use "$temp/Weather.dta", clear

merge 1:1 uid2000 year using "$temp/Death.dta"
keep if _m == 3 
drop _merge
	
drop n_* f_* m_*

xtset uid2000 year

replace degree1 = degree1/1000
label var degree1 "Agricultural temperature '000 [non-mono]"
replace degree = degree/1000
label var degree "Agricultural temperature '000 [mono]"
replace degree1_a = degree1_a/1000
label var degree1_a "Agricultural temperature '000 (all months)"
replace wdegree = wdegree/1000
label var wdegree "Agricultural temperature '000 (weighted)"
replace degree1m = degree1/1000
label var degree1m "Agricultural temperature '000 [non-mono m]"

gen l_degree1=l.degree1
label var l_degree1 "Agricultural temperature '000 (t-1) [non-mono]"
gen l_degree = l.degree
label var l_degree "Agricultural temperature '000 (t-1) [mono]"
gen l_degree1_a = l.degree1_a
label var l_degree1_a "Agricultural temperature '000 (t-1) (all months)"
gen l_wdegree = l.wdegree 
label var l_wdegree "Agricultural temperature '000 (t-1) (weighted)"
gen l_degree1m=l.degree1m
label var l_degree1m "Agricultural temperature '000 (t-1) [non-mono m]"

gen period = (year==1959 | year==1960 | year==1961)
bysort uid2000 period (l_degree1): gen cty_flag = 1 if _n ==1 & !mi(l_degree1)
bysort uid2000: egen periods = total(cty_flag)  

forvalues i=1951/1970 {
	g year_`i'=(year==`i')
	label var year_`i' "Year-`i'"
}

forvalues j=1951/1970 {
	g l_degree1_year_`j'=l_degree1*year_`j'
	label var l_degree1_year_`j' "Agricultural temperature (t-1)*Year-`j'"
	gen l_degree_year_`j' = l_degree*year_`j'
	label var l_degree_year_`j' "Agricultural temperature (t-1)*Year-`j' [mono]"
	gen l_degree1_a_year_`j' = l_degree1_a*year_`j'
	label var l_degree1_a_year_`j' "Agricultural temperature (t-1)*Year-`j' (all months)"
	gen l_wdegree_year_`j' = l_wdegree*year_`j'
	label var l_wdegree_year_`j' "Agricultural temperature (t-1)*Year-`j' (weighted)"
	g l_degree1m_year_`j'=l_degree1m*year_`j'
	label var l_degree1m_year_`j' "Agricultural temperature (t-1)*Year-`j' [m]"
}

/*------------------------------------------------------------------------------
Constuct measures of famine intensity
Part 2: Table S3 and predicted cohort loss rate (based on thermal agricultural 
productivity) by county-year
------------------------------------------------------------------------------*/

global d_year year_1953 - year_1970
global l_degree1_d_year l_degree1_year_1953 l_degree1_year_1954 l_degree1_year_1955 ///
	l_degree1_year_1956 l_degree1_year_1957 l_degree1_year_1958 l_degree1_year_1959 ///
	l_degree1_year_1960 l_degree1_year_1961 l_degree1_year_1962 l_degree1_year_1963 ///
	l_degree1_year_1964 l_degree1_year_1965 l_degree1_year_1966 l_degree1_year_1967 ///
	l_degree1_year_1968 l_degree1_year_1969 l_degree1_year_1970

global outfile "$tables/TableS3_famine_thermal.xls"
global sample periods == 2 
global options fe cluster(uid2000)
global btopt bootstrap, force reps(100) seed(1)

quietly xtreg death_n_1990 l_degree1 $d_year $l_degree1_d_year if $sample , $options vce($btopt)
predict est_death_rate_n_1990, xb
label var est_death_rate_n_1990 "Predicted cohort loss rate (n_1990) [non-mono]"
outreg2 using "$outfile", replace  bdec(3) sdec(3) ctitle("n_1990") label ///
	addnote(Robust s.e. clustered at county level) 

foreach i in n_2000 n_9000 {
	quietly xtreg death_`i' l_degree1 $d_year $l_degree1_d_year if $sample , $options vce($btopt)
	predict est_death_rate_`i', xb
	label var est_death_rate_`i' "Predicted cohort loss rate (`i') [non-mono]"
	outreg2 using "$outfile", append  bdec(3) sdec(3) ctitle("`i'") label
}

* Alternative construction of predicted cohort loss rate

foreach i in f_1990 m_1990 f_2000 m_2000 f_9000 m_9000 n_1990_q f_1990_q m_1990_q ///
	n_1990_e f_1990_e m_1990_e {
	quietly xtreg death_`i' l_degree1 $d_year $l_degree1_d_year if $sample , $options vce($btopt)
	predict est_death_rate_`i', xb
	label var est_death_rate_`i' "Predicted cohort loss rate (`i') [non-mono]"
}

global l_degree_d_year l_degree_year_1953 l_degree_year_1954 l_degree_year_1955 ///
	l_degree_year_1956 l_degree_year_1957 l_degree_year_1958 l_degree_year_1959 ///
	l_degree_year_1960 l_degree_year_1961 l_degree_year_1962 l_degree_year_1963 ///
	l_degree_year_1964 l_degree_year_1965 l_degree_year_1966 l_degree_year_1967 ///
	l_degree_year_1968 l_degree_year_1969 l_degree_year_1970
global l_degree1_a_d_year l_degree1_a_year_1953 l_degree1_a_year_1954 ///
	l_degree1_a_year_1955 l_degree1_a_year_1956 l_degree1_a_year_1957 ///
	l_degree1_a_year_1958 l_degree1_a_year_1959 l_degree1_a_year_1960 ///
	l_degree1_a_year_1961 l_degree1_a_year_1962 l_degree1_a_year_1963 ///
	l_degree1_a_year_1964 l_degree1_a_year_1965 l_degree1_a_year_1966 ///
	l_degree1_a_year_1967 l_degree1_a_year_1968 l_degree1_a_year_1969 ///
	l_degree1_a_year_1970
global l_wdegree_d_year l_wdegree_year_1953 l_wdegree_year_1954 ///
	l_wdegree_year_1955 l_wdegree_year_1956 l_wdegree_year_1957 ///
	l_wdegree_year_1958 l_wdegree_year_1959 l_wdegree_year_1960 ///
	l_wdegree_year_1961 l_wdegree_year_1962 l_wdegree_year_1963 ///
	l_wdegree_year_1964 l_wdegree_year_1965 l_wdegree_year_1966 ///
	l_wdegree_year_1967 l_wdegree_year_1968 l_wdegree_year_1969 ///
	l_wdegree_year_1970
global l_degree1m_d_year l_degree1m_year_1953 l_degree1m_year_1954 ///
	l_degree1m_year_1955 l_degree1m_year_1956 l_degree1m_year_1957 ///
	l_degree1m_year_1958 l_degree1m_year_1959 l_degree1m_year_1960 ///
	l_degree1m_year_1961 l_degree1m_year_1962 l_degree1m_year_1963 ///
	l_degree1m_year_1964 l_degree1m_year_1965 l_degree1m_year_1966 ///
	l_degree1m_year_1967 l_degree1m_year_1968 l_degree1m_year_1969 ///
	l_degree1m_year_1970


foreach i in n_1990 f_1990 m_1990 {
	quietly xtreg death_`i' l_degree $d_year $l_degree_d_year if $sample, $options vce($btopt)
	predict est_death_rate_`i'_mono, xb
	label var est_death_rate_`i'_mono "Predicted cohort loss rate (`i') [mono]"
	
	quietly xtreg death_`i' l_degree1_a $d_year $l_degree1_a_d_year if $sample, $options vce($btopt)
	predict est_death_rate_`i'_a, xb
	label var est_death_rate_`i'_a "Predicted cohort loss rate (`i') (all months)"
	
	quietly xtreg death_`i' l_wdegree $d_year $l_wdegree_d_year if $sample, $options vce($btopt)
	predict est_death_rate_`i'_w, xb
	label var est_death_rate_`i'_w "Predicted cohort loss rate (`i') (weighted)"

	quietly xtreg death_`i' l_degree1m $d_year $l_degree1m_d_year if $sample, $options vce($btopt)
	predict est_death_rate_`i'_m, xb
	label var est_death_rate_`i'_m "Predicted cohort loss rate (`i') (region-specifc)"
}

keep uid2000 year est_death_rate*
save "$final/famine-intensity-estcy.dta", replace

/*------------------------------------------------------------------------------
Construct measures of famine intensity
Part 3: Construct famine intensity at county level by taking the difference in 
predicted cohort loss rate between famine and normal periods 
------------------------------------------------------------------------------*/

gen period = (year==1959 | year==1960 | year==1961)
collapse (mean) est_death_rate*, by(uid period)

foreach i in n_1990 f_1990 m_1990 n_2000 f_2000 m_2000 n_9000 f_9000 m_9000 n_1990_q f_1990_q m_1990_q n_1990_e f_1990_e m_1990_e {
	replace est_death_rate_`i' = -est_death_rate_`i' if period==0
}
foreach i in n_1990 f_1990 m_1990 {
	replace est_death_rate_`i'_mono = -est_death_rate_`i'_mono if period==0

	replace est_death_rate_`i'_a = -est_death_rate_`i'_a if period==0

	replace est_death_rate_`i'_w = -est_death_rate_`i'_w if period==0

	replace est_death_rate_`i'_m = -est_death_rate_`i'_m if period==0

}

collapse (sum) est_death_rate_* , by(uid)
foreach i in n_1990 f_1990 m_1990 n_2000 f_2000 m_2000 n_9000 f_9000 m_9000 n_1990_q f_1990_q m_1990_q n_1990_e f_1990_e m_1990_e {
	rename est_death_rate_`i' rltv_death_rate_`i'
	label var rltv_death_rate_`i' "Constructed famine intensity (`i') [non-mono]"
}
foreach i in n_1990 f_1990 m_1990 {
	rename est_death_rate_`i'_mono rltv_death_rate_`i'_mono
	label var rltv_death_rate_`i'_mono "Constructed famine intensity (`i') [mono]"
	
	rename est_death_rate_`i'_a rltv_death_rate_`i'_a
	label var rltv_death_rate_`i'_a "Constructed famine intensity (`i') (all months)"
	
	rename est_death_rate_`i'_w rltv_death_rate_`i'_w
	label var rltv_death_rate_`i'_w "Constructed famine intensity (`i') (weighted)"
	
	rename est_death_rate_`i'_m rltv_death_rate_`i'_m
	label var rltv_death_rate_`i'_m "Constructed famine intensity (`i') (region-specific)"
}

save "$final/famine-intensity-estc.dta", replace

/*------------------------------------------------------------------------------
Figure 1: Plotting famine intensity
------------------------------------------------------------------------------*/
use "$final/famine-intensity-estc.dta", clear

gen prov = floor(uid2000/10000)

xtile quintile = rltv_death_rate_n_1990, nq(5)
merge 1:m uid2000 using "$data/match_00_ID"
drop _m

spmap quintile using "$data/China_coord.dta", id(_ID) ///
	clbreaks(0.5 1.5 2.5 3.5 4.5 5.5) fcolor(Reds) ndfcolor(gs15) clmethod(custom)
graph export "$figures/Figure1_China.pdf", as(pdf) replace

spmap quintile using "$data/China_coord.dta" if prov==51, id(_ID) ///
	clbreaks(0.5 1.5 2.5 3.5 4.5 5.5) fcolor(Reds) ndfcolor(gs15) clmethod(custom) 
graph export "$figures/Figure1_Sichuan.pdf", as(pdf) replace
